Install the following libraries before running the TDA pipeline.
#install.packages(c("TDA", "Matrix"))
library("Matrix") #package for sparse matrices
library("TDA")
working_dir_path <- setwd("~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline")
source("utilities.R")
You only need to run this block once
dir.create("saved_computations", showWarnings = FALSE)
saved_computations <- concat_path(working_dir_path, "saved_computations")
#choose names for two data groups
groups <- list("group_0", "group_25")
for (i in 1:length(groups)){
dir.create(concat_path(saved_computations,groups[i]), showWarnings = FALSE)
}
You only need to run this block once; after running it once comment it out
#max_values <- data.frame(matrix(ncol = 2, nrow = 0))
#colnames(max_values) <- c("maxbirth", "maxdeath")
#write.csv(max_values, concat_path(saved_computations,"max-values.csv"), row.names=FALSE)
If you are using different cell type identification method, have separate csv files for cells of distinct types
#set 'yes' if you use our cell type identification method;
#set 'no' if you use different cell type identification method; in this case, have separate csv files for cells of distinct types
our_cell_type_identification <- "yes"
if (our_cell_type_identification == "yes"){
#specify location path of the file with the cell type codes and their colors
cell_types_path <- "~/Documents/GitHub/TDA-Microscopy-Pipeline/Identification/cell_colors.csv"
#read all cell type codes from your designated file
cell_types_file <- read.csv(cell_types_path)
all_cell_types <- cell_types_file$Code
# indicate cell types for only including specific cells
cell_types <- all_cell_types[2] # this can either be a single value or a vector
index <- NaN # default index for cell types is the last column
sprintf("Analysis will be done for cells of type: %s", paste(cell_types, collapse=", "))
}
## [1] "Analysis will be done for cells of type: 1"
#this will include all cells
if (our_cell_type_identification == "no"){
print("You are using a different cell type identification method. Analysis will be done for all cells.")
cell_types <- NaN
index <- NaN
}
#choose a group folder where to save computations
group <- "group_25"
# specify location path of the csv files of that group
csv_dir_path <- "~/Documents/GitHub/TDA-Microscopy-Pipeline/Discretized-images/25"
We use Delaunay complex filtration to compute persistence diagrams
compute_PDs(csv_dir_path, group, saved_computations, cell_types, index)
## [1] "Processing file 25_dox_1.csv"
## [1] "Processing file 25_dox_10.csv"
## [1] "Processing file 25_dox_11.csv"
## [1] "Processing file 25_dox_12.csv"
## [1] "Processing file 25_dox_13.csv"
## [1] "Processing file 25_dox_14.csv"
## [1] "Processing file 25_dox_15.csv"
## [1] "Processing file 25_dox_16.csv"
## [1] "Processing file 25_dox_2.csv"
## [1] "Processing file 25_dox_3.csv"
## [1] "Processing file 25_dox_4.csv"
## [1] "Processing file 25_dox_5.csv"
## [1] "Processing file 25_dox_6.csv"
## [1] "Processing file 25_dox_7.csv"
## [1] "Processing file 25_dox_8.csv"
## [1] "Processing file 25_dox_9.csv"
## [1] "Persistence diagrams are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25/group_25_PDs.RData"
This is crucial for VII block and will ensure that all plots from both groups have the same scale
#Before running this block, compute persistence diagrams for all data groups (i.e choose another data group and specify its location in II-block, and compute its persistence diagrams in III-block)
max_values <- read.csv(concat_path(saved_computations, "max-values.csv"))
max_birth <- max(max_values$maxbirth)
max_death <- max(max_values$maxdeath)
save_plot <- TRUE #set FALSE if you don't want to save plots
plot_PDs(group, max_birth, max_death, saved_computations, save_plot)
## [1] "Persistence diagrams plots are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25"
#choose a persistence threshold
persist_threshold <- 20
save_plot <- TRUE #set FALSE if you don't want to save plots
#plot representative cycles with persistence above the chosen threshold
plot_representative_cycles(csv_dir_path, cell_types, persist_threshold, group, saved_computations, save_plot)
## [1] "Processing file 25_dox_1.csv"
## # Generated complex of size: 7557
## [1] "Processing file 25_dox_10.csv"
## # Generated complex of size: 5079
## [1] "Processing file 25_dox_11.csv"
## # Generated complex of size: 5561
## [1] "Processing file 25_dox_12.csv"
## # Generated complex of size: 7909
## [1] "Processing file 25_dox_13.csv"
## # Generated complex of size: 1883
## [1] "Processing file 25_dox_14.csv"
## # Generated complex of size: 2323
## [1] "Processing file 25_dox_15.csv"
## # Generated complex of size: 2851
## [1] "Processing file 25_dox_16.csv"
## # Generated complex of size: 4557
## [1] "Processing file 25_dox_2.csv"
## # Generated complex of size: 11789
## [1] "Processing file 25_dox_3.csv"
## # Generated complex of size: 12539
## [1] "Processing file 25_dox_4.csv"
## # Generated complex of size: 12547
## [1] "Processing file 25_dox_5.csv"
## # Generated complex of size: 4931
## [1] "Processing file 25_dox_6.csv"
## # Generated complex of size: 7313
## [1] "Processing file 25_dox_7.csv"
## # Generated complex of size: 8131
## [1] "Processing file 25_dox_8.csv"
## # Generated complex of size: 10765
## [1] "Processing file 25_dox_9.csv"
## # Generated complex of size: 3591
## [1] "Representative cycle are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25"
You need to have persistence diagrams computed first
#choose a discretization step (use the same for all groups)
discr_step <- 0.3
save_computations_csv <- FALSE #set TRUE if additionally to saving persistence landscapes in RData format you want to save each of them in a separate csv file. This option is provided if, for example, you plan to use persistence landscapes for further analysis in another programming language. Note that in a csv file row k corresponds to a persistence landscape function at depth k, where depth 1 corresponds to a outermost persistence landscape function, and column names correspond to x-values.
compute_PLs(group, max_birth, max_death, discr_step, saved_computations, save_computations_csv)
## [1] "Persistence landscapes are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25/group_25_PLs.RData"
save_plot <- TRUE #set FALSE if you don't want to save plots
plot_PLs(group, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Persistence landscape plots are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25/group_25"
You need to have persistence landscapes computed first
compute_avgPL(group, max_birth, max_death, discr_step, saved_computations)
## [1] "Average persistence landscape is saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25/group_25_avgPL.RData"
save_plot <- TRUE #set FALSE if you don't want to save plots
plot_avgPL(group, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Average persistence landscape plot is saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25"
You need to have computed average persistence landscapes for both groups (make sure that they were plotted on the same scale)
save_plot <- TRUE #set FALSE if you don't want to save plots
#avgPL of groups[0] minus avgPL of groups[1]
plot_avgPLs_difference(groups, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Plot of the difference of two average persistence landscapes is saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations"
You need to have computed persistence landscapes for both groups
#number of permutations
num_repeats <- 10000
permutation_test_for_PLs(groups, saved_computations, num_repeats)
## [1] "p-value 0.0007"